library(tidyverse)
library(haven)
library(readxl)

#### Set working directory to parent directory (above 'code')

# Define a plot theme

theme_custom <- theme(legend.position = "none", 
                      panel.background = element_rect(fill=NA),
                      panel.grid.major.x = element_line(colour = "grey80"),
                      panel.grid.major.y = element_line(colour = "grey80"),
                      panel.grid.minor.y =  element_blank(),
                      panel.grid.minor.x =  element_blank(),
                      panel.ontop = FALSE,
                      axis.line = element_line(size = 1, colour = "black"),
                      axis.text = element_text(size = 16),
                      # title = element_text(size = 16),
                      legend.text = element_text(size = 12),
                      legend.title = element_text(size = 16),
                      axis.title = element_text(size = 16))
theme_custom_largetype <- theme_custom +
  theme(axis.text = element_text(size = 24),
        title = element_text(size = 24))

#### Load in Roger and Rowan Informals panel dataset ####


informals_v3 <- read_dta("data/informals_v3.dta") %>% 
  mutate_at(vars(acd:zc), ~ifelse(. < 0, NA, .)) %>% 
  mutate_at(vars(acd:zc), ~ifelse(. > 1, NA, .))

#### Load in merged dataset: Roger and Rowan, alongside Vabulas and Snidal ####

informals_merge_vs_rr <- read_excel("data/informals_merge_vs_rr.xlsx")

informals_union <- informals_merge_vs_rr %>%
  filter(match >= 1) %>% 
  select(io_abb_rr) %>% 
  mutate(io_abb_rr = tolower(io_abb_rr)) %>% 
  drop_na() 

informals_v3[, c("state_name", "ccode", "year", "active", informals_union$io_abb_rr)] %>% dim()

informals_sum_union <- informals_v3 %>% 
  select("state_name", "ccode", "year", "active", informals_union$io_abb_rr) %>% 
  filter(active==1,
         year>1964) %>% 
  # select(-active) %>% 
  mutate_at(vars(apf:zc), ~as.numeric(.)) %>% 
  group_by(ccode, year) %>% 
  summarize(informals_sum_union = rowSums(across(apf:zc), na.rm=T)) 
  
informals_sum_full <- informals_v3 %>% 
  filter(active==1,
         year>1964) %>% 
  mutate_at(vars(acd:zc), ~as.numeric(.)) %>% 
  group_by(ccode, year) %>% 
  summarize(informals_sum_full = rowSums(across(acd:zc), na.rm=T)) 

# Plot correlations
full_join(informals_sum_full, informals_sum_union, by = c("ccode", "year")) %>% 
  group_by(year) %>% 
  summarize(correlation = cor(x = informals_sum_union, 
                              y = informals_sum_full, 
                              method = "spearman",
                              use = "complete.obs"))  %>% 
  ggplot(., aes(x = year, y = correlation)) +
  geom_line() +
  geom_point() +
  labs(x = "Year", y = "Spearman correlation coefficient") +
  scale_x_continuous(limits = c(1965, 2010), breaks = seq(1970, 2010, 10)) +
  scale_y_continuous(limits = c(0.5, 1), breaks = seq(0, 1, .1)) +
  theme_custom -> p1
p1
ggsave("figures/figure_informals_rr_vs_correlation.pdf", units = "in", height = 6, width = 8)

# Plot scatter
set.seed(1)
full_join(informals_sum_full, informals_sum_union, by = c("ccode", "year")) %>% 
  filter(year == 1965) %>% 
  ggplot(., aes(x = informals_sum_union, y = informals_sum_full)) +
  geom_abline(slope = 1, intercept = 0) +
  geom_jitter(alpha = 0.7, width = 0.1, height = 0, size = 2.5) +
  scale_x_continuous(limits = c(0, 30), breaks = seq(0, 30, 5)) +
  scale_y_continuous(limits = c(0, 80), breaks = seq(0, 80, 10)) +
  labs(x = "", y = "", title = "1965") +
  theme_custom_largetype -> p2a

set.seed(1)
full_join(informals_sum_full, informals_sum_union, by = c("ccode", "year")) %>% 
  filter(year == 1985) %>% 
  ggplot(., aes(x = informals_sum_union, y = informals_sum_full)) +
  geom_abline(slope = 1, intercept = 0) +
  geom_jitter(alpha = 0.7, width = 0.1, height = 0, size = 2.5) +
  scale_x_continuous(limits = c(0, 30), breaks = seq(0, 30, 5)) +
  scale_y_continuous(limits = c(0, 80), breaks = seq(0, 80, 10)) +
  labs(x = "", y = "", title = "1985") +
  theme_custom_largetype -> p2b

set.seed(1)
full_join(informals_sum_full, informals_sum_union, by = c("ccode", "year")) %>% 
  filter(year == 1995) %>% 
  ggplot(., aes(x = informals_sum_union, y = informals_sum_full)) +
  geom_abline(slope = 1, intercept = 0) +
  geom_jitter(alpha = 0.7, width = 0.1, height = 0, size = 2.5) +
  scale_x_continuous(limits = c(0, 30), breaks = seq(0, 30, 5)) +
  scale_y_continuous(limits = c(0, 80), breaks = seq(0, 80, 10)) +
  labs(x = "", y = "", title = "1995") +
  theme_custom_largetype -> p2c

set.seed(1)
full_join(informals_sum_full, informals_sum_union, by = c("ccode", "year")) %>% 
  filter(year == 2010) %>% 
  ggplot(., aes(x = informals_sum_union, y = informals_sum_full)) +
  geom_abline(slope = 1, intercept = 0) +
  geom_jitter(alpha = 0.7, width = 0.1, height = 0, size = 2.5) +
  labs(x = "", y = "", title = "2010") +
  scale_x_continuous(limits = c(0, 30), breaks = seq(0, 30, 5)) +
  scale_y_continuous(limits = c(0, 80), breaks = seq(0, 80, 10)) +
  theme_custom_largetype -> p2d


scatter_fouryears <- gridExtra::grid.arrange(
  p2a, p2b, p2c, p2d,
  nrow = 2, ncol = 2,
  bottom=grid::textGrob("Informal IO membership (V&S)",
                        gp=grid::gpar(fontsize=30,font=8)),
  left=grid::textGrob("Informal IO memberships (R&R)", rot = 90,
                      gp=grid::gpar(fontsize=30,font=8)))


ggsave("figures/figure_informals_membership_fouryears.pdf",
       scatter_fouryears,
       width=16, height=12, units = "in")


